library(tidyverse)

allen24 <- read_delim('/home/supervisor/mist2/gjsx/allentoft2024eurasia/i2t.vcf',
                      comment = '##')

allen24$INFO

allen24$FORMAT

allen24_i2t <- allen24 |>
  select(10:last_col()) |>
  pivot_longer(everything(), names_to = 'sample_id')

allen24_i2t <- allen24_i2t |>
  filter(value != './.:.:.:.')

pop24_it <- read_tsv('mission/allentoft2024_sample_pop.tsv') |>
  right_join(allen24_i2t) |>
  separate_wider_delim(value, delim = ':', names = c('genotype','dosage','posterior','haplo'))

pop24_it <- pop24_it |>
  filter(pop_id != 'exclude') |>
  rowwise() |>
  mutate(location = str_extract(pop_id, '^[a-zA-Z]+'),
         time = str_extract_all(pop_id, '\\d+(?=BP)') |> pluck(1) |> as.numeric() |> mean(),
         dosage = as.numeric(dosage))

pop24_it |>
  ggplot(aes(time, dosage)) +
  stat_summary(fun = 'mean') +
  coord_flip()

pop24_it |>
  count(location,sort = T)

pop24_it |>
  filter()
